library(readxl)
library(sjmisc)
library(moments)
library(corrr)
library(broom)
library(tidyverse)
library(sjlabelled)
library(texreg)
library(xtable)
library(haven)
library(lmtest)
library(grid)
library(gridExtra)
library(foreign)
library(readstata13)

options(scipen=999)

#Definir usuario
ruta <- "C:/Users/Matias/Dropbox/Proyectos/Fondecyt Participacion Electoral/Articulos/Electoral Winners y Plebiscito/Harvard Dataverse/1. Presidential Election (study 1)/"

ola_1 <- read_dta(paste0(ruta, "2. Database/Wave1_Study1.dta"))
ola_2 <- read_dta(paste0(ruta, "2. Database/Wave2_Study1.dta"))
ola_3 <- read.dta13(paste0(ruta, "2. Database/Wave3_Study1.dta"))
ola_4 <- read_dta(paste0(ruta, "2. Database/Wave4_Study1.dta"))
ola_5 <- read_dta(paste0(ruta, "2. Database/Wave5_Study1.dta"))

## Meta-Datos

get.meta<- function(base) {
  tabla <- tibble("Nombre"= names(base),
                  "Etiqueta de la variable"=as.character(sapply(names(base), function(var){attr(getElement(base,var),"label")})),
                  "Clase"= as.character(sapply(names(base), function(var){class(getElement(base,var))})),
                  "Etiquetas de los valores"= as.character(sapply(names(base), function(var){attr(getElement(base,var),"labels")})),
                  "Valores distintos"=as.numeric(sapply(names(base), function(var){n_distinct(getElement(base,var))})))
  return(tabla)
}

meta.ola_1 <- get.meta(ola_1)
meta.ola_2 <- get.meta(ola_2)
meta.ola_3 <- get.meta(ola_3)
meta.ola_4 <- get.meta(ola_4)
meta.ola_5 <- get.meta(ola_5)

## Pasar a numericas.

ola_1 <- ola_1 %>% mutate_at(vars(names(ola_1),
                                  -meta.ola_1$Nombre[meta.ola_1$Clase == "character" | 
                                                       meta.ola_1$Clase == "Date"]),
                             as.numeric) %>% 
  rename(educ = C3, ocupa = C4, ocupa_nom = C5_SQ001,
         tareas_nom = C5_SQ002, trab_ocup = C6, ingreso = C7, relig = C8, relig_other = C8_other,
         p_relig = C9)
ola_2 <- ola_2 %>% mutate_at(vars(names(ola_2),
                                  -meta.ola_2$Nombre[meta.ola_2$Clase == "character" | 
                                                       meta.ola_2$Clase == "Date"]),
                             as.numeric)
ola_3 <- ola_3 %>% mutate_at(vars(names(ola_3),
                                  -meta.ola_3$Nombre[meta.ola_3$Clase == "character" | 
                                                       meta.ola_3$Clase == "Date"]),
                             as.numeric) %>%
  separate(SUBMITDATE, into  = c('SUBMITDATE', 'TIME SUBMIT'), sep = ' ') %>%
  separate(STARTDATE, into = c("STARTDATE", "TIME START"), sep = " ") %>%
  separate(DATESTAMP, into = c("DATESTAMP","TIME STAMP"), sep = " ")

ola_4 <- ola_4 %>% mutate_at(vars(names(ola_4),
                                  -meta.ola_4$Nombre[meta.ola_4$Clase == "character" | 
                                                       meta.ola_4$Clase == "Date"]),
                             as.numeric) %>%
  separate(SUBMITDATE, into = c("SUBMITDATE", "TIME SUBMIT"), sep = " ") %>% 
  separate(STARTDATE, into = c("STARTDATE","TIME START"), sep = " ") %>%
  separate(DATESTAMP, into = c("DATESTAMP", "TIME STAMP"), sep = " ") %>%
  rename(FOLIO = ID_OLA1)

ola_5 <- ola_5 %>% mutate_at(vars(names(ola_5),
                                  -meta.ola_5$Nombre[meta.ola_5$Clase == "character" | 
                                                       meta.ola_5$Clase == "Date"]),
                             as.numeric)

## Rename.

### 1. Todas las variables de la Ola 2, a excepci?n de las que son detalladas en el comando rename, 
### se les agrega "_B"

ola_2 <- ola_2 %>% rename_at(vars(-FOLIO,-starts_with(c("NOMBRE","COD","C","DATE","TIME")),
                                  -EDAD_PANELISTA,-ends_with("DATE")),function(x)paste0(x,"_B")) %>%
  rename_at(vars(starts_with(c("DATE","TIME")), ends_with("DATE")),function(x)tolower(x))

### Se procede a homologar las variables en funci?n de la Ola 1.

ola_2 <- ola_2 %>% rename(P14 = P26b_B, P15_SQ001 = P27b_SQ001_B,
                         P16 = P28_B, P17 = P29_B, P18_SQ001 = P31_SQ001_B, P18_SQ002 = P31_SQ002_B,
                         P18_SQ003 = P31_SQ003_B, P19_SQ001 = P32_SQ001_B, P19_SQ002 = P32_SQ002_B, 
                         P19_SQ003 = P32_SQ003_B, P19_SQ004 = P32_SQ004_B, P19_SQ005 = P32_SQ005_B,
                         P19_SQ006 = P32_SQ006_B, P21_SQ001 = P33_SQ001_B, P21_SQ002 = P33_SQ002_B,
                         P21_SQ003 = P33_SQ003_B, P22 = P34_B, P24 = P36_B, P25 = P37_B,
                         P26_SQ001 = P38_SQ001_B, P26_SQ002 = P38_SQ002_B, P26_SQ003 = P38_SQ003_B, 
                         P26_SQ004 = P38_SQ004_B, P26_SQ005 = P38_SQ005_B, 
                         jef_hog = C1, ed_jefe = C2, 
                         prev = C4, estab_educa = C5, hijo_hno_esc = C6, estab_educa_hijo_hmno = C7,
                         retiro_10 = C8_SQ001, retiro_10_2 = C8_SQ002, benef_estado = C8_SQ003,
                         nac_ext = C9, covid_fam = C10
                         )

ola_3 <- ola_3 %>%
  rename_with(~ paste0(., "_C"), 
              .cols = -c(FOLIO, EDAD_PANELISTA, ID, 
                         starts_with(c("NOMBRE", "COD", "C", "ATTRIBUTE_", "DATE", "TIME")), 
                         ends_with("DATE"))) %>%
  rename_with(tolower,.cols = starts_with(c("ATTRIBUTE_", "DATE", "TIME"))) %>%
  rename_with(tolower,.cols = ends_with("DATE"))

ola_3 <- ola_3 %>% rename(P2 = P2_C, P11_SQ001 = P12_1_C, P11_SQ002 = P12_2_C,
                          P11_SQ003 = P12_3_C,
                          P14 = P13_C, P15_SQ001 = P14_1_C, P18B = P17C_C, P18_SQ001 = P21_1_C,
                          P18_SQ002 = P21_2_C, P18_SQ003 = P21_3_C, P19_SQ001 = P22_1_C, P19_SQ002 = P22_2_C, 
                          P19_SQ003 = P22_3_C,
                          P19_SQ004 = P22_4_C, P19_SQ005 = P22_5_C, P19_SQ006 = P22_6_C, P19_SQ007 = P22_7_C, P21_SQ001 = P24_1_C,
                          P21_SQ002 = P24_2_C, P21_SQ003 = P24_3_C, P22 = P26_C, P24 = P27_C, P25 = P28_C,
                          P26_SQ001 = P29_1_C, P26_SQ002 = P29_2_C, P26_SQ003 = P29_3_C, P26_SQ004 = P29_4_C,
                          P26_SQ005 = P29_5_C,
                          P27_SQ001 = P30_1_C, P27_SQ002 = P30_2_C,
                          P27_SQ003 = P30_3_C, P27_SQ004 = P30_4_C, P27_SQ005 = P30_5_C, sexo = C1,
                          edad = C2, educ = C3, ocupa = C4, trab_ocup = C6, ingreso = C7, n_personas_hogar = C8
                          )

ola_4 <- ola_4 %>% rename_at(vars(-FOLIO,-starts_with(c("NOMBRE","COD","C","DATE","TIME")),
                                  -EDAD_PANELISTA,-ends_with("DATE")),function(x)paste0(x,"_D")) %>%
  rename_at(vars(starts_with(c("DATE","TIME")), ends_with("DATE")),function(x)tolower(x))

ola_4 <- ola_4 %>% rename(P4a_B = P4A_D, P5_B = P5_D, P6_SQ001_B = P6_1_D, P6_SQ002_B = P6_2_D,
                          P6_SQ003_B = P6_3_D, P6_SQ004_B = P6_4_D, P6_SQ005_B = P6_5_D, P6_SQ006_B = P6_6_D,
                          P3b_B = P3B_D, P4b_B = P4B_D, P27_1_B = P8_1_D, P27_2_B = P8_2_D,
                          P27_3_B = P8_3_D, P27_4_B = P8_4_D, P27_5_B = P8_5_D, P27_6_B = P8_6_D,
                          P27_7_B = P8_7_D, P25b_1_B = P25_1_D, P25b_2_B = P25_2_D, P25b_3_B = P25_3_D,
                          P25b_4_B = P25_4_D, P25b_5_B = P25_5_D, P25b_6_B = P25_6_D,
                          P14 = P26_D, P15_SQ001 = P27_1_D, P16 = P28_D, P17 = P29_D, P17_otros = P29_OTHER_D,
                          P30_B = P30_D, P18_SQ001 = P31_1_D, P18_SQ002 = P31_2_D, P18_SQ003 = P31_3_D,
                          P19_SQ001 = P32_1_D, P19_SQ002 = P32_2_D, P19_SQ003 = P32_3_D, P19_SQ004 = P32_4_D, P19_SQ005 = P32_5_D,
                          P19_SQ006 = P32_6_D, P19_SQ007 = P32_7_D, P21_SQ001 = P33_1_D, P21_SQ002 = P33_2_D,
                          P21_SQ003 = P33_3_D, P22 = P34_D, P35_1_B = P35_1_D, P35_2_B = P35_2_D, P35_3_B = P35_3_D, 
                          P35_4_B = P35_4_D, P35_5_B = P35_5_D, P35_6_B = P35_6_D,
                          P24 = P36_D, P25 = P37_D, P26_SQ001 = P38_1_D, P26_SQ002 = P38_2_D, P26_SQ003 = P38_3_D, 
                          P26_SQ004 = P38_4_D, P26_SQ005 = P38_5_D, 
                          P40_1_B = P40_1_D, P40_2_B = P40_2_D, P40_3_B = P40_3_D,
                          P40_4_B = P40_4_D, P40_5_B = P40_5_D, P40_6_B = P40_6_D,
                          P31_1_C = P41_1_D, P31_2_C = P41_2_D, ed_jefe = C2,
                          C3_SQ001 = C3_1, C3_SQ002 = C3_2, C3_SQ003 = C3_3,
                          C3_SQ004 = C3_4, C3_SQ005 = C3_5, C3_SQ006 = C3_6,
                          prev = C4, retiro_10 = C8_1, retiro_10_2 = C8_2,
                          retiro_10_3 = C8_3, benef_estado = C8_4, nac_ext = C9,
                          covid_fam = C10)

ola_5 <- ola_5 %>% rename_at(vars(-FOLIO, -starts_with(c("NOMBRE","COD","C","date")),
                                  -EDAD_PANELISTA, -ends_with("date")),
                             function(x)paste0(x,"_E"))

ola_5 <- ola_5 %>% rename(P6_SQ001_B = P3_1_E, P6_SQ002_B = P3_2_E, P6_SQ003_B = P3_3_E, P6_SQ004_B = P3_4_E ,
                          P27_1_B = P10_1_E, P27_2_B = P10_2_E, P27_3_B = P10_3_E,
                          P27_4_B = P10_4_E, P27_5_B = P10_5_E, P27_6_B = P10_6_E, 
                          P27_8_B = P10_7_E, P14 = P11_E, P15_SQ001 = P12_1_E, 
                          P16 = P13_E, P17 = P14_E, P17_otros = P14_other_E,
                          P30_B = P15_E, P18_SQ001 = P16_1_E, P18_SQ002 = P16_2_E,
                          P18_SQ003 = P16_3_E, P19_SQ001 = P17_1_E, P19_SQ002 = P17_2_E,
                          P19_SQ004 = P17_3_E, P19_SQ005 = P17_4_E, P19_SQ006 = P17_5_E,
                          P21_SQ001 = P33_1_E, P21_SQ002 = P33_2_E,
                          P21_SQ003 = P33_3_E, P22 = P18_E, P24 = P19_E, P25 = P20_E) 

## Se guardan las bases de datos en formato RData.

save(ola_1, file = paste0(ruta, "2. Database/PRE MAYO.Rdata"))
save(ola_2, file = paste0(ruta, "2. Database/POST MAYO.Rdata"))
save(ola_3, file = paste0(ruta, "2. Database/PRE NOV.Rdata"))
save(ola_4, file = paste0(ruta, "2. Database/POST NOV.Rdata"))
save(ola_5, file = paste0(ruta, "2. Database/Segunda Vuelta.Rdata"))